cf <- readRDS("data/campfire-tweets-2020-04-17.Rds")
Sources <- cf %>%
filter(str_detect(screen_name, "CALFIRE_ButteCo|Cal_Fire|ButteSheriff|ChicoPolice|ChicoFD|CountyOfButte|Paradise_CA"))
no_outliers <- top_n(Sources, -29, created_at_pst)
no_outliers %>%
group_by(screen_name) %>%
summarize(min(created_at_pst))
## # A tibble: 3 x 2
## screen_name `min(created_at_pst)`
## <chr> <dttm>
## 1 ButteSheriff 2018-11-08 08:03:55
## 2 CALFIRE_ButteCo 2018-11-08 06:51:47
## 3 ChicoFD 2018-11-08 07:46:17
no_outliers$screen_name <- as.factor(no_outliers$screen_name)
no_outliers %>% group_by(tweet_hour, screen_name, tweet_min) %>%
summarize(tweet_count=n()) %>%
ggplot(aes(x=tweet_hour, y=tweet_count, fill=screen_name)) + geom_col()
range(Sources$created_at_pst)
## [1] "2018-11-08 06:51:47 PST" "2018-12-19 13:46:14 PST"
plot.fav <- no_outliers %>% filter(favorite_count>1) %>% ggplot(aes(x=favorite_count, fill=screen_name)) + geom_histogram()
plot.rt <- no_outliers %>% filter(retweet_count>1) %>% ggplot(aes(x=retweet_count, fill=screen_name)) + geom_histogram()
plot.quo <- no_outliers %>% filter(quote_count>1) %>% ggplot(aes(x=quote_count, fill=screen_name)) + geom_histogram()
plot.rply <- no_outliers %>% filter(reply_count>1) %>% ggplot(aes(x=reply_count, fill=screen_name)) + geom_histogram()
gridExtra::grid.arrange(plot.fav, plot.rt, plot.quo, plot.rply, nrow=2)
Second Plot Type
library(vistime)
library(plotly)
no_outliers$text <- gsub("(\\. )", "\\.\n", no_outliers$text)
no_outliers$text <- gsub("(^\\#)", "\n\\#", no_outliers$text)
no_outliers$text <- gsub("(^\\@)", "\n\\@", no_outliers$text)
no_outliers$text <- gsub("(\\: )+", "\\:\n", no_outliers$text)
no_outliers$text <- gsub("(http)", "\nhttp", no_outliers$text)
time <- (vistime(no_outliers, events = "text", groups = "screen_name",
start = "created_at_pst", show_labels=FALSE, color = "#0bc8e0"))
timeline <- plotly_build(time)
m <- list(
l = 50,
r = 50,
b = 100,
t = 100,
pad = 4
)
h <- timeline %>%
layout(autosize = F, width = 1100, height = 600, margin = m)
h
And another
library(timelineS)
#timelineS(no_outliers, main = "Life of Michael Jackson")
news <- cf %>%
filter(str_detect(screen_name,"news|News") | str_detect(description, "news|News")) %>%
filter(verified=="TRUE")
news_orgs <- cf %>%
users_data() %>%
distinct(screen_name, .keep_all = TRUE) %>%
filter(str_detect(screen_name, "news|News") | str_detect(description, "news|News")) %>%
filter(verified=="TRUE") %>%
arrange(desc(followers_count))
news$user_type <- "news"
public <- anti_join(x = cf, y = news_orgs, by = "screen_name")
public$user_type <- "public"
cf <- rbind(public, news)
top.20.users <- news %>%
group_by(screen_name) %>%
summarise(n=n()) %>%
arrange(desc(n)) %>%
slice(1:20)
ggplot(top.20.users, aes(x = reorder(screen_name, -n), y=n)) +
geom_bar(stat="identity", fill="darkslategray")+
theme_minimal() + coord_flip() +
xlab("Users") + ylab("Count")
I’m interested in the sentiment difference between users who are considered a news outlet vs the general public. We plan to look at more individual political people like the Sheriff when going through this more thoroughly in our project.
ts1 <- tweet_words_nostop %>%
inner_join(get_sentiments("afinn"))
ts2 <- ts1 %>% group_by(status_id) %>% summarize(sentiment=sum(value))
cf2 <- Sources %>% left_join(ts2, by='status_id')
ggplot(cf2, aes(x=sentiment, col=screen_name)) + geom_density(lwd=2) + theme_minimal()